library(tictoc)
library(ggplot2)
library(dplyr)
library(data.table)
names_functions = list.files(here::here("functions"))
for (f in names_functions)
source(here::here("functions", f))
rm(f, names_functions)Export and plot results from MCMC output
This notebook loads the MCMC output from the script estimate_models.r and calculates
- the probability of each party winning the election
- the probability of each party winning the electoral votes in the different states
- the posterior density of the expected vote shares of each party on election day
- the posterior distribution of the expected vote shares over time at the state, regional and national level
Note that the plots below show the results for the entire election campaign, i.e. up until election day. The results written to csv are filtered to only include win probabilities and mean expected vote shares until the last day for which polls are available in a given scenario.
Libraries, functions
Load “global” variables
parties <- load_parties()
states <- load_states()
regions <- load_regions()
states_regions <- load_dataland_states_regions()
n_parties_by_state <- load_n_parties_by_geography("state")
dates_campaign <- load_dates_election_campaign(year = 2024)
electoral_votes <- load_electoral_votes()
election_day <- load_election_day()Load priors -> visualize along with posterior distribution of \pi_T
priors <- readRDS(here::here("priors", "priors.Rds"))Generate results for different scenarios
scenarios <- load_scenarios()List to store plots
plts <- list()Define a function to generate csv output and plots for each scenario
gen_results <- function(plts, scen) {
print(scen)
out <- readRDS(here::here("model", paste0("mcmc_out_", scen, ".Rds")))
df_polls <- read.csv(here::here("data", paste0("df_polls_", scen, ".csv")))
df_polls$date <- as.Date(df_polls$date)
# make non-zero polls more visible in plots
df_polls$value = ifelse(df_polls$value == 0,
NA,
df_polls$value)
df_prior_ppi <- invert_alr_on_prior(priors[[scen]][["m_mmu_T"]])
# use data.table -> much, much quicker than tidyverse!
df_draws_ppi <- convert_draws_to_dt(
rstan::extract(out, pars = "ppi")[["ppi"]],
geographies = states,
parties = parties,
dates_campaign = dates_campaign)
df_draws_ppi_reg <- convert_draws_to_dt(
rstan::extract(out, pars = "ppi_reg")[["ppi_reg"]],
geographies = regions,
parties = parties,
dates_campaign = dates_campaign)
df_draws_ppi_nat <- convert_draws_to_dt(
rstan::extract(out, pars = "ppi_nat")[["ppi_nat"]],
geographies = "national",
parties = parties,
dates_campaign = dates_campaign)
tic("calc prob win election")
df_prob_win_election <- do.call(
"rbind",
lapply(
dates_campaign,
FUN = calc_prob_win_election,
df_draws_ppi = df_draws_ppi,
df_draws_ppi_nat = df_draws_ppi_nat,
states = states,
parties = parties,
electoral_votes = electoral_votes
)
)
toc()
tic("calc prob win states")
df_prob_win_states <- do.call(
"rbind",
lapply(
dates_campaign,
FUN = calc_prob_win_states,
df_draws_ppi = df_draws_ppi,
states = states,
parties = parties
)
)
toc()
# Export mean vote share and win probabilities to read
tic("calc mean vote states")
df_draws_ppi %>%
group_by(t, party, geography) %>%
summarise(mean_vote_share = mean(values)) %>%
rename(date = t, province = geography) %>%
mutate(party = tolower(party)) %>%
tidyr::pivot_wider(
names_from = "party",
values_from = "mean_vote_share",
names_glue = "{party}_{.value}") -> df_out_mean
toc()
df_prob_win_states %>%
mutate(party = tolower(party)) %>%
rename(province = geography) %>%
tidyr::pivot_wider(
names_from = "party",
values_from = "prob_win",
names_glue = "{party}_{.value}") -> df_out_prob_win_states
write.csv(
merge(
# only export until day of latest available poll
filter(df_out_mean, date <= max(df_polls$date)),
filter(df_out_prob_win_states, date <= max(df_polls$date)),
by = c("date", "province")
),
file = here::here(paste0("provincial_forecast_", scen, ".csv")),
row.names = FALSE
)
tic("calc mean vote share national")
df_draws_ppi_nat %>%
select(-geography) %>%
group_by(t, party) %>%
summarise(mean_vote_share = mean(values)) %>%
rename(date = t) %>%
mutate(party = tolower(party)) %>%
tidyr::pivot_wider(
names_from = "party",
values_from = "mean_vote_share",
names_glue = "{party}_{.value}") -> df_out_mean_nat
toc()
tic("transform win prob election for export")
df_prob_win_election %>%
mutate(party = tolower(party)) %>%
tidyr::pivot_wider(
names_from = "party",
values_from = "prob_win",
names_glue = "{party}_{.value}") -> df_out_prob_win_election
toc()
write.csv(
merge(
# only export until day of latest available poll
filter(df_out_mean_nat, date <= max(df_polls$date)),
filter(df_out_prob_win_election, date <= max(df_polls$date)),
by = "date"
),
file = here::here(paste0("national_forecast_", scen, ".csv")),
row.names = FALSE
)
# Plots
plot_prob_win_election(
df_prob_win_election,
election_day,
scen) -> plts[[scen]][["plt_prob_win_election"]]
tic("plot win prob election over time")
plot_prob_win_election_over_time(
df_prob_win_election,
scen) -> plts[[scen]][["plt_prob_win_election_over_time"]]
toc()
plot_prob_win_states(
df_prob_win_states,
election_day,
scen) -> plts[[scen]][["plt_prob_win_states"]]
plot_prob_win_states_over_time(
df_prob_win_states,
scen) -> plts[[scen]][["plt_prob_win_states_over_time"]]
plot_ppiT(
df_draws_ppi,
df_prior_ppi,
election_day,
n_geographies = length(states),
scen) -> plts[[scen]][["plt_ppiT"]]
plot_ppi(
df_draws_ppi,
filter(df_polls, scenario == scen),
n_geographies = length(states),
type_of_poll = "state",
plt_title_prefix = scen) -> plts[[scen]][["plt_ppi"]]
plot_ppi(
df_draws_ppi_reg,
filter(df_polls, scenario == scen),
n_geographies = length(regions),
type_of_poll = "regional",
plt_title_prefix = scen) -> plts[[scen]][["plt_ppi_reg"]]
plot_ppi(
df_draws_ppi_nat,
filter(df_polls, scenario == scen),
n_geographies = 1,
type_of_poll = "national",
plt_title_prefix = scen) -> plts[[scen]][["plt_ppi_nat"]]
rm(df_draws_ppi,
df_draws_ppi_reg,
df_draws_ppi_nat,
df_polls,
df_prior_ppi,
df_prob_win_election,
df_prob_win_states,
df_out_prob_win_election,
df_out_prob_win_states,
df_out_mean,
df_out_mean_nat,
out)
plts
}Loop over scenarios
for (scen in scenarios) {
plts <- gen_results(plts, scen)
}[1] "A"
calc prob win election: 210.75 sec elapsed
calc prob win states: 194.59 sec elapsed
calc mean vote states: 0.39 sec elapsed
calc mean vote share national: 0.07 sec elapsed
transform win prob election for export: 0.01 sec elapsed
plot win prob election over time: 0.02 sec elapsed
[1] "B"
calc prob win election: 315.86 sec elapsed
calc prob win states: 227.36 sec elapsed
calc mean vote states: 0.53 sec elapsed
calc mean vote share national: 0.08 sec elapsed
transform win prob election for export: 0.01 sec elapsed
plot win prob election over time: 0.04 sec elapsed
[1] "C"
calc prob win election: 278.38 sec elapsed
calc prob win states: 252.42 sec elapsed
calc mean vote states: 0.47 sec elapsed
calc mean vote share national: 0.11 sec elapsed
transform win prob election for export: 0.01 sec elapsed
plot win prob election over time: 0.02 sec elapsed
[1] "D"
calc prob win election: 324.98 sec elapsed
calc prob win states: 195.61 sec elapsed
calc mean vote states: 0.56 sec elapsed
calc mean vote share national: 0.1 sec elapsed
transform win prob election for export: 0.03 sec elapsed
plot win prob election over time: 0.02 sec elapsed
[1] "E"
calc prob win election: 216.69 sec elapsed
calc prob win states: 282.51 sec elapsed
calc mean vote states: 0.72 sec elapsed
calc mean vote share national: 0.13 sec elapsed
transform win prob election for export: 0.01 sec elapsed
plot win prob election over time: 0.02 sec elapsed